import re
from lxml import etree
with open('myhtml.txt','r',encoding='utf-8') as f:
    a=f.read()
    a.replace("\n",'')
    result=re.findall(r'href="//item.jd.com+[^a]+[.$]',a)
    for i in result:
        i=i.split('\n')[0]
    set1=set(result)
    for i in set1:
        with open("html.txt",'a',encoding='utf-8') as a:
            h=i.split('//')[1]+'html'
            a.write(h+"\n")


